# -*- coding: utf-8 -*-
"""
Created on Fri Feb  7 12:02:21 2020

@author: chen-zefeng
"""

#note: need pandas version < 1.4.0

import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.stattools as smtsa

#go to the directory where this file is located
import os
os.chdir(os.path.dirname(os.path.abspath(__file__)))

dataQ = pd.read_excel('Q_1946m1-2018m12.xlsx',sheet_name='UK Gilts Q')
dataQ2 = pd.read_excel('Q_1900m11-1947m12.xlsx',sheet_name='UK Gilts Q old')
dataQ=dataQ.append(dataQ2,ignore_index=True)


dataQ2 = pd.read_excel('Q_1881m1-1947m12.xlsx',sheet_name='UK Gilts Q old')
dataQ=dataQ.append(dataQ2,ignore_index=True)
dataQ2 = pd.read_excel('Q_1860m3-1881m2.xlsx',sheet_name='UK Gilts Q old')
dataQ=dataQ.append(dataQ2,ignore_index=True)
dataQ2 = pd.read_excel('Q_1694m3-1860m2.xlsx',sheet_name='UK Gilts Q old')
dataQ=dataQ.append(dataQ2,ignore_index=True)


dataP = pd.read_excel('P_1976m1-2018m12.xlsx',sheet_name='UK Gilts 1975 onwards')
dataP2 = pd.read_excel('P_1963m1-1975m12.xlsx',sheet_name='UK Gilts')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1949m1-1962m12.xlsx',sheet_name='UK Gilts')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1881m1-1949m3.xlsx',sheet_name='UK Gilts P old')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1860m3-1888m2.xlsx',sheet_name='UK Gilts P old')
dataP=dataP.append(dataP2,ignore_index=True)

dataP2 = pd.read_excel('P_1729m7-1860m2.xlsx',sheet_name='UK Gilts P old')
dataP=dataP.append(dataP2,ignore_index=True)


#########
dataQ['Day']=1;
dataQ['Date']=pd.to_datetime(dataQ[['Year','Month','Day']])
dataQ=dataQ.drop(columns=['Year','Month','Day'])


dataP['Day']=1;
dataP['Date']=pd.to_datetime(dataP[['Year','Month','Day']])
dataP=dataP.drop(columns=['Year','Month','Day'])

timeline=np.sort(list(set(dataP['Date'])))

totaldebt=pd.DataFrame(index=timeline)
totaldebt['debt']=np.nan

for i in timeline:
    data_trun=dataP.loc[dataP['Date']==i]
    data_trunQ=dataQ.loc[dataQ['Date']==i]
    
    
    ##Get Isincode
    isincodelist=list(data_trun['ID'])
    isincodelistQ=list(data_trunQ['ID'])
    value=0;
    
    for j in isincodelist:
        if j in isincodelistQ:
            if data_trunQ.loc[data_trunQ['ID']==j]['Q'].values.size>0:      
                value=value+data_trun.loc[data_trun['ID']==j]['P'].iloc[0]*data_trunQ.loc[data_trunQ['ID']==j]['Q'].iloc[0]/100
            else:
                print(j+' not found in time'+i)
    
    totaldebt.loc[i,'debt']=value
    
totaldebt.to_csv('totaldebt_new.csv',index=True)   